import pandas as pd
import os

def process_tsv_file(tsv_file, masterlist_df):
    # Read the TSV file into a DataFrame
    tsv_df = pd.read_csv(tsv_file, sep='\t', header=None)
    
    # Ensure the TSV DataFrame has the correct number of columns
    if tsv_df.shape[1] < 9:
        raise ValueError(f"File {tsv_file} does not have at least 9 columns.")
    
    # Rename columns for easier access
    tsv_df.columns = ['col' + str(i) for i in range(tsv_df.shape[1])]
    
    # Rename columns in masterlist for easier merge
    masterlist_df.columns = ['master_col0', 'master_col1']
    
    # Step 1: Match master_col0
    match_col0 = pd.merge(masterlist_df, tsv_df[['col7', 'col8']], left_on='master_col0', right_on='col7', how='left')
    match_col0.rename(columns={'col8': 'match_col0'}, inplace=True)
    
    # Step 2: Match master_col1 for rows that did not match master_col0
    no_match_col0 = match_col0[match_col0['match_col0'].isna()]
    if not no_match_col0.empty:
        no_match_col0 = no_match_col0[['master_col1']].rename(columns={'master_col1': 'col7'})
        match_col1 = pd.merge(no_match_col0, tsv_df[['col7', 'col8']], on='col7', how='left')
        match_col1.rename(columns={'col8': 'match_col1'}, inplace=True)
    else:
        match_col1 = pd.DataFrame(columns=['col7', 'match_col1'])
    
    # Combine results
    combined = match_col0[['master_col0', 'master_col1', 'match_col0']].copy()
    combined = combined.reset_index(drop=True)
    combined = combined.merge(match_col1[['col7', 'match_col1']], left_on='master_col1', right_on='col7', how='left')
    
    # Determine the final match
    combined['match'] = combined['match_col0'].combine_first(combined['match_col1'])
    combined['match'].fillna('NA', inplace=True)
    
    return combined['match']

def main():
    # Define the path to the masterlist file
    masterlist_file = 'master_list.csv'
    output_file = 'GSE193372part2output.csv'
    
    # Read the masterlist CSV file
    masterlist_df = pd.read_csv(masterlist_file)
    
    # Initialize the output DataFrame with the masterlist columns
    output_df = masterlist_df.copy()
    
    # Process each TSV file in the working directory
    for file in os.listdir('.'):
        if file.endswith('.tsv'):
            tsv_file = file
            print(f"Processing file: {tsv_file}")
            
            # Process the TSV file
            results = process_tsv_file(tsv_file, masterlist_df)
            
            # Add the results to the output DataFrame
            output_df[tsv_file] = results
            
            # Write the updated DataFrame to the output file
            output_df.to_csv(output_file, index=False)
            print(f"Results for {tsv_file} written to {output_file}")

if __name__ == "__main__":
    main()


